prototype sets refer to sets of 3 prototypes that correspond to a single participant in the Hu & Nosofsky 2024 study. prototype pairs refer to the specific pairs of prototypes that are displayed together on a rating trial (3 pairs per set).
# d |> filter(sbjCode==11) |> select(sbjCode,date,trial,pair_label,set,rt,time_elapsed,time)# d |> group_by(sbjCode,set) |> # summarize (n=n()) |># gt()#d |> group_by(sbjCode, item_label_1, item_label_2) |> summarise(n=n())# (1-.33)^8# (factorial(8)/(factorial(6)*factorial(8-6))) * (.33^6)*((1-.33)^(8-6))# (factorial(8)/(factorial(7)*factorial(8-7))) *(.33^6)*((1-.33)^(8-7))# (factorial(8)/(factorial(8)*factorial(8-8))) *(.33^6)*(1-.33)^(8-8)# d |> pivot_longer(cols=c(item_label_1, item_label_2), names_to="item_label", values_to="item") |> # group_by(sbjCode, item) |> summarise(n=n())# patternCounts <- d |> pivot_longer(cols=c(item_label_1, item_label_2), names_to="item_label", values_to="item") |> # group_by(item) |> summarise(n=n(),resp=mean(response),sd=sd(response)) |> arrange(desc(n))# d |> # pivot_longer(cols=c(item_label_1, item_label_2), names_to="item_label", values_to="item") |> select(sbjCode,set,pair_label,item_label,item,response) |> group_by(set) |># summarize(n=n_distinct(sbjCode)) |> arrange(desc(n)) # d |> group_by(sbjCode, file) |> summarise(n=n())# d |> group_by(sbjCode, set) |> summarise(n=n())# d |> group_by(sbjCode) |> summarise(n_distinct(file))# d |> group_by(sbjCode) |> summarise(n_distinct(set))sp<-setCounts2|>mutate(set=reorder(set,n))|>ggplot(aes(x=set,y=n))+geom_col()+theme(legend.title=element_blank(), axis.text.x =element_text(size=5,angle =90, hjust =0.5, vjust =0.5))+labs(x="Prototype Set", y="Number of Participants to rate set")sh<-setCounts2|>ggplot(aes(x=n))+geom_histogram(binwidth =1)+scale_x_continuous(breaks=seq(0, max(setCounts2$n), by =1))+geom_text(stat="count", aes(label=..count..), vjust=-0.5)+labs(x="Number of times prototype set has been included in the study", y="Number of prototype sets for each count")sp/sh
Prototype set counts
Code
setCounts2|>group_by(n)|>summarise(nc=n())|>rename("Number of times prototype set has been included in the study"=n, "Number of prototype sets with this count"=nc)|>gt()|>tab_spanner(label ="Prototype Set Counts")|>tab_header(title ="Prototype Set Counts")|>tab_source_note("Note: The number of times a prototype set has been included in the study is the number of participants who rated the set.")
Prototype Set Counts
Number of times prototype set has been included in the study
Number of prototype sets with this count
3
3
4
7
5
8
6
17
7
30
8
37
9
44
10
48
11
44
12
20
13
18
14
17
15
5
16
5
17
1
Note: The number of times a prototype set has been included in the study is the number of participants who rated the set.
Rating Distributions
Code
pgr<-d|>ggplot(aes(x=response))+geom_histogram(binwidth=1)+scale_x_continuous(breaks=seq(1, 9, by =1))+coord_cartesian(xlim =c(1, 9))+labs(title="Aggregate Rating Distribution", x="Rating", y="Count")pir<-d|>ggplot(aes(x=response))+geom_histogram(binwidth=1)+facet_wrap(~sbjCode)+scale_x_continuous(breaks=seq(1, 9, by =1))+coord_cartesian(xlim =c(1, 9))+labs(title="Rating Distribution per Sbj.", x="Rating", y="Count")pgr/pir
Rating distributions
Reaction Time Distributions
Code
prtg<-d|>ggplot(aes(x=rt))+geom_density()+labs(title="Aggregate Reaction Time Distribution", x="Reaction Time (ms)", y="Density")prtid<-d|>ggplot(aes(x=rt))+geom_density()+facet_wrap(~sbjCode,scale="free_x")+labs(title="Reaction Time Distribution per Sbj.", x="Reaction Time (ms)", y="Density")prtg/prtid
d%>%filter(pair_label%in%{pairCounts|>filter(n>=min_resp)|>slice_max(mean_resp,n=n_show, with_ties=FALSE)|>pull(pair_label)})|>group_by(pair_label)|>slice_head(n=1)%>%plot_dotsAll()+plot_annotation(title=glue::glue("Highest rated pairs ( out of sets with n>={min_resp} ratings)"), theme =theme(plot.title =element_text(hjust =0.4)))
All pairs with >=10 ratings
click on column headers to change sort order
e.g. clicking on “Mean Rating” will toggle showing the pairs rated most similar or most dissimilar
clicking on “SD” will toggle showing the pairs with the most or least agreement in ratings
Assess # of patterns in various binnings - e.g. quartile, decile
Code
patternAvg<-d|>pivot_longer(cols=c(item_label_1, item_label_2), names_to="item_label", values_to="item")|>group_by(item,file)|>summarise(n_rating=n(),resp=mean(response),sd=sd(response))|>arrange(desc(n_rating))cat_sim<-sbj_cat|>mutate(item=item_label)|>left_join(patternAvg,by=c("file","item"))|>arrange(desc(n_rating))|>#remove rows where n_rating is NA, or less than 4filter(!is.na(n_rating),n_rating>=12)|>mutate(sim_group =ifelse(resp>6.0,"Very Similar",ifelse(resp<3.5,"Very Dissimilar","Medium")))|>mutate(sim_group=factor(sim_group,levels=c("Very Dissimilar","Medium","Very Similar")))# bin data by rating (resp) into quartilest1<-cat_sim|>mutate(Quartile =ntile(resp, 5))|>group_by(Quartile)|>summarize("Avg. Similarity Rating"=mean(resp),sd=sd(resp),n_ratings=n_distinct(file), .groups="drop")t2<-cat_sim|>mutate(Decile =ntile(resp, 10))|>group_by(Decile)|>summarize("Avg. Similarity Rating"=mean(resp),sd=sd(resp),n_ratings=n_distinct(file), .groups="drop")t3<-cat_sim|>group_by(sim_group)|>summarize("Avg. Similarity Rating"=mean(resp),sd=sd(resp),n_ratings=n_distinct(file), .groups="drop")t1|>kbl(caption="Quartiles")
Quartiles
Quartile
Avg. Similarity Rating
sd
n_ratings
1
3.4
0.53
106
2
4.3
0.17
125
3
4.8
0.13
140
4
5.3
0.14
133
5
6.0
0.39
103
Code
t2|>kbl(caption="Deciles")
Deciles
Decile
Avg. Similarity Rating
sd
n_ratings
1
3.0
0.46
56
2
3.8
0.13
69
3
4.2
0.10
70
4
4.5
0.08
71
5
4.7
0.07
78
6
5.0
0.07
78
7
5.2
0.07
76
8
5.4
0.07
75
9
5.7
0.11
69
10
6.3
0.34
58
Code
t3|>kbl(caption="Extreme Groups")
Extreme Groups
sim_group
Avg. Similarity Rating
sd
n_ratings
Very Dissimilar
2.9
0.45
50
Medium
4.8
0.64
270
Very Similar
6.4
0.34
52
Combined Category Testing Performance with Similarity Ratings (resp)
Each subject in the 2024 study has a similarity score for each of their 3 categories. (averaged over 2 comparisons with that categories prototype)
The same category similarity scores are then compared to their accuracy for each of the Pattern Tyeps (old, prototype, new low, new med, new high)
Code
cat_sim%>%# round all numerics except sbjCode to 2 decimal placesmutate(across(where(is.numeric), ~round(., 1)))|>select(-id,-sim_group,-item_label)|>relocate(item,file, .after=sd)|>rename("Category Similarity"=resp, "CatLearn Accuracy"=Corr)|>DT::datatable(options =list(pageLength =6))
Code
p3<-cat_sim|>mutate(Quartile =ntile(resp, 4))|>ggplot(aes(x=Quartile,y=Corr,fill=Quartile))+stat_bar+facet_wrap(~Pattern.Type)+labs(y="CatLearn Accuracy", x="Similarity Rating Quintile", title="Effect by Pattern Type")p4<-cat_sim|>mutate(Decile =ntile(resp, 10))|>ggplot(aes(x=Decile,y=Corr,fill=Decile))+stat_bar+facet_wrap(~Pattern.Type)+labs(y="CatLearn Accuracy", x="Similarity Rating Decile", title="Effect by Pattern Type")p5<-cat_sim|>mutate(Quartile =ntile(resp, 4))|>ggplot(aes(x=Quartile,y=Corr,fill=Quartile))+stat_bar+facet_wrap(~condit)+labs(y="CatLearn Accuracy", x="Similarity Rating Quartile", title="Effect by Training Condition")p6<-cat_sim|>mutate(Decile =ntile(resp, 10))|>ggplot(aes(x=Decile,y=Corr,fill=Decile))+stat_bar+facet_wrap(~condit)+labs(y="CatLearn Accuracy", x="Similarity Rating Decile", title="Effect by Training Condition")p7<-cat_sim|>mutate(Quartile =ntile(resp, 4))|>ggplot(aes(x=Quartile,y=Corr,fill=Quartile))+stat_bar+facet_nested_wrap(~condit+Pattern.Type)+labs(y="CatLearn Accuracy", x="Similarity Rating Quintile", title="Effect by Training Condition and Pattern Type")p3+p4
Code
p5+p6
Code
p7
Code
p9<-cat_sim|>ggplot(aes(y=Corr,x=Pattern.Type, fill=sim_group))+stat_bar+labs(title="Group by Pattern Type",y="CatLearn Accuracy")p10<-cat_sim|>ggplot(aes(y=Corr,x=condit, fill=sim_group))+stat_bar+labs(title="Group by Condit",y="CatLearn Accuracy")p9/p10
Code
cat_sim|>ggplot(aes(x=Corr,y=resp))+geom_point(aes(col=Pattern.Type))+geom_smooth(aes(fill=Pattern.Type),method ="lm")+labs(y="Similarity Rating", x="Aggregated Accuracy in CatLearn study")
Code
cat_sim|>ggplot(aes(x=Corr,y=resp))+geom_point(aes(col=Pattern.Type))+geom_smooth(aes(fill=Pattern.Type),method ="lm")+facet_wrap(~condit)+labs(y="Similarity Rating", x="Aggregated Accuracy in CatLearn study")